# Libraries
import os
import numpy as np
import random
import statistics
import matplotlib.pyplot as plt
from IPython.display import Image
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
# Show current working directory
print(os.getcwd())
/Users/darioholenstein/Documents/zhaw/data-analytics/data_analytics/Week_06
Image("electric_cars.png", width=800)
# Parameters of distribution
p_mean = 320
p_std = 80
p_n = 2500
# Setting a seed to restore the random numbers later
np.random.seed(42)
# Generate (normally distributed) data with mean = 320km and standard deviation = 80km
pop = np.random.normal(loc=p_mean, scale=p_std, size=p_n)
print(f'{len(pop):.0f}')
print(f'{np.mean(pop):.4f}')
print(f'{np.std(pop):.4f}')
# Plot histogram based on the data
plt.hist(pop, bins = 50, color='gold')
plt.title('Electric range WLTP (simulated data)')
plt.xlabel('Range (km)')
plt.ylabel('Cars')
plt.show()
2500 322.7011 78.6103
# Sample size
sample_size = 100
# Setting a seed to restore the random numbers later
random.seed(42)
# Random sample
random_sample = random.sample(list(pop), sample_size)
# Statistics of the sample
print(f'Mean: {statistics.mean(random_sample):.4f}')
print(f'Standard deviation: {statistics.stdev(random_sample):.4f}')
# Plot histogram based on the sample
plt.hist(random_sample, bins = 50, color='deepskyblue')
plt.title(f'Electric range WLTP (single sample with n={sample_size})', fontsize=10)
plt.xlabel('Range (km)')
plt.ylabel('Cars')
plt.show()
Mean: 323.5502 Standard deviation: 75.5315
def bootstrap(p_mean= 320, p_std= 80, num_iter = 5000, sample_size = 200):
"""
Creates bootstrap sample from a distribution.
:param p_mean population mean. Type = int, float
:param p_std. Type = int, float
:param p_n. Type = int, float
:param num_iterations: number of iterations. Type = int
:param sample_size: size of single bootstrap sample. Type = int
:return: plot of bootstrap sample.
"""
# Setting a seed to restore the random numbers later
random.seed(42)
# Generate (normally distributed) data
pop = np.random.normal(loc=p_mean, scale=p_std, size=p_n)
# Create empty lists to save results of iterations
sample_mean = []
sample_std = []
# Loop for iterations
for i in range(num_iter):
samp = random.sample(pop.tolist(), sample_size)
avg = np.mean(samp)
std = np.std(samp)
sample_mean.append(avg)
sample_std.append(std)
# Print mean and sd of bootsstapped distribution
print(f'{np.mean(sample_mean):.4f}')
print(f'{np.mean(sample_std):.4f}')
# Plot histogram based on the bootstrapped data
plt.hist(sample_mean, bins = 50, color='mediumorchid')
plt.title('Electric range WLTP (bootstrap sample)', fontsize=10)
plt.xlabel('Range (km)')
plt.ylabel('Cars')
plt.show()
# Function call
bootstrap(p_mean=320, p_std=80, num_iter = 1000, sample_size = 1000)
318.8035 78.6710
import os
import platform
import socket
from platform import python_version
from datetime import datetime
print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')
----------------------------------- POSIX Darwin | 24.0.0 Datetime: 2024-11-06 10:57:02 Python Version: 3.11.5 -----------------------------------